#!/usr/bin/env python3
# Check all .po files, and make sure they don't have duplicate msgid's in there.
# (Perhaps normal and commented out).
#
# As well as checking for files that have been wrapped, which they shouldn't be.
# Our extraction scripts make sure the files are extracted unwrapped, and we
# *should have configured* Weblate to do the same, but sometimes Weblate still wraps.
from os import path
import collections
import os
import re
import sys


def scan_pofiles(root, scanner):
    """Return True if there are any problems."""
    ret = False
    for dir, _, files in os.walk(root):
        for file in files:
            if file.endswith('.po'):
                fullpath = path.join(dir, file)
                ret |= scanner(fullpath)
    return ret


PAT = re.compile('^(?:#~ )?msgid "(.*)"')


def check_for_duplicates(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        lines = f.read().splitlines()

    indexes = collections.defaultdict(list)
    for i, line in enumerate(lines):
        m = PAT.match(line)
        if m:
            indexes[m.group(1)].append(i)

    morethanonce = [(word, lines) for word, lines in indexes.items() if len(lines) > 1]
    if len(morethanonce) == 0:
        return False

    print('-' * 70)
    print(filename)
    print('-' * 70)

    for word, linenrs in morethanonce:
        print(f"The translation of '{word}' occurs {len(linenrs)} times:")
        print('')
        for nr in linenrs:
            print(f'    {nr + 1}: {lines[nr]}')
            print(f'    {nr + 2}: {lines[nr + 1]}')
            print('')

    return True


def check_for_wrapping(filename):
    """Check that strings in the source files are not wrapped, except at newlines.

    Wrapped entries start with `msgstr ""`; this is only allowed if all strings on subsequent
    lines end in `\n`.
    """
    with open(filename, 'r', encoding='utf-8') as f:
        lines = [x.strip() for x in f.read().splitlines()]

    any_failed = False

    # Add a sentinel empty line at the end so we have to do fewer boundary checks
    lines.append('')

    # Below is a 2-state state machine. Find the next `msgstr ""`, then check that all
    # lines in it confrm to the requirements.
    # Skip the header, which always contains a `msgstr ""` we're not interested in.
    nr = 20
    while nr < len(lines):
        while nr < len(lines) and 'msgstr ""' not in lines[nr]:
            nr += 1

        msg_start = nr
        nr += 1

        # Accumulate all lines starting with `"`
        msg_lines = []
        while nr < len(lines) and lines[nr].startswith('"'):
            msg_lines.append(lines[nr])
            nr += 1

        # Check that all message lines except the last one end in `\n"`:
        if not all(x.endswith('\\n"') for x in msg_lines[:-1]):
            print(f'{path.relpath(filename)}:{msg_start + 1}: msgstr has been wrapped!')
            any_failed = True

    return any_failed


def check_for_empty_in_En(filename):
    """Check that strings in the English file are not empty.
    (Other languages can be empty as Weblate will fill them)

    """
    any_failed = False
    language = filename.split('LC_')[0][-3:-1]
    if language == 'en':
        with open(filename, 'r', encoding='utf-8') as f:
            lines = [x.strip() for x in f.read().splitlines() if x.strip() != '']
            lines_len = len(lines)
            line_number = 20
            for line in lines[20:]:
                # the following checks whether the current msgstr consists of one or more lines.
                # check tutorial_code_snippet for an example.
                next_line_empty = line_number + 1 < lines_len and "msgid" in lines[line_number+1]
                if line == 'msgstr ""' and next_line_empty:
                    any_failed = True
                    print(f"String {lines[line_number-1]} is empty!!")
                line_number += 1

            if any_failed:
                print("Empty string detected in en.po")

    else:
        any_failed = False

    return any_failed


if __name__ == '__main__':
    pofilespath = path.join(path.dirname(__file__), '..', '..', 'translations')
    fail = False
    fail |= scan_pofiles(pofilespath, check_for_duplicates)
    fail |= scan_pofiles(pofilespath, check_for_wrapping)
    fail |= scan_pofiles(pofilespath, check_for_empty_in_En)
    if fail:
        print('Something went wrong with the gettext translation files.')
        print('See above for the problems.')
        print('')
        print('- If duplicates were found, please make sure every msgid occurs only once,')
        print('  not commented out, with the right translation.')
        print('- If lines were wrapped, run the extraction script again to remove wrapping')
        print('  and double-check the Weblate configuration.')
        sys.exit(1)
